## Replication code for Section 4 of the Supplmentary Information for:
##  Abrajano, Elmendorf, and Quinn.
##  "Labels vs. Pictures: Treatment Mode Effects in Experiments About
##   Discrimination." Political Analysis. 
##
## Marisa Abrajano, Christopher Elmendorf, and Kevin Quinn
## 9/24/2017
##

library(lmtest)
library(sandwich)
library(msm)
library(ggplot2)

## function that does clustered SEs
## (from Hainmueller, Hopkins, & Yamomoto)
vcovCluster <- function(model, cluster){
  require(sandwich)
  require(lmtest)
  if(nrow(model.matrix(model))!=length(cluster)){
    stop("check your data: cluster variable has different N than model")
  }
  M <- length(unique(cluster))
  N <- length(cluster)           
  K <- model$rank   
  if(M<50){
    warning("Fewer than 50 clusters, variances may be unreliable (could try block bootstrap instead).")
  }
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj  <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
  rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
  return(rcse.cov)
}


## load the long (stacked) data
load("./PhotosLong-ALL.Rda")
load("./WordsLong-ALL.Rda")

## convert choice variable into numeric
mydata.w.long$choice <- as.numeric(mydata.w.long$choice == "CandA")
mydata.p.long$choice <- as.numeric(mydata.p.long$choice == "CandA")




###########################################################################
## Hypothesis 1 (and 4 and 5)

## estimate tau (effect of text ethnicity on choice for full sample)
mydata.sub <- na.omit(mydata.w.long)
lm.tau <- lm(choice ~ candA.ethnicity, weights=weight, data=mydata.sub)
tab.tau <- coeftest(lm.tau, vcov=vcovCluster(lm.tau,
                                             cluster=mydata.sub$respondent.ID))


## estimate nu (effect of photo ethnicity on choice for full sample)
mydata.sub <- na.omit(mydata.p.long)
lm.nu <- lm(choice ~ candA.ethnicity, weights=weight, data=mydata.sub)
tab.nu <- coeftest(lm.nu, vcov=vcovCluster(lm.nu,
                                             cluster=mydata.sub$respondent.ID))



delta1.hat <- coef(lm.tau)[2] - coef(lm.nu)[2]
delta1.var <- tab.tau[2,2]^2 + tab.nu[2,2]^2
delta1.se <- sqrt(delta1.var)
delta1.z <- delta1.hat / delta1.se
delta1.pval <- 1 - pchisq(delta1.z^2, df=1)









## put estimates in a nice latex table
library(xtable)

tau.hat <- coef(lm.tau)[2]
tau.se <- tab.tau[2,2]
tau.z <- tau.hat / tau.se
tau.pval <- 1 - pchisq(tau.z^2, df=1)

nu.hat <- coef(lm.nu)[2]
nu.se <- tab.nu[2,2]
nu.z <- nu.hat / nu.se
nu.pval <- 1 - pchisq(nu.z^2, df=1)



estimands <- c("tau", "nu", "delta1")
estimates <- c(tau.hat, nu.hat, delta1.hat)
ses <- c(tau.se, nu.se, delta1.se)
pvals <- c(tau.pval, nu.pval, delta1.pval)



tab.out <- data.frame(estimand=estimands, estimate=estimates,
                      se=ses, pval=pvals)

rownames(tab.out) <- NULL
print(xtable(tab.out, digits=3), include.rownames=FALSE)

cat("\n\n")





########################################################################
########################################################################
## now just the first matchup


## load the long (stacked) data
load("./PhotosLong-ALL.Rda")
load("./WordsLong-ALL.Rda")

## convert choice variable into numeric
mydata.w.long$choice <- as.numeric(mydata.w.long$choice == "CandA")
mydata.p.long$choice <- as.numeric(mydata.p.long$choice == "CandA")

## subset to just 1st matchup
mydata.w.long <- mydata.w.long[mydata.w.long$matchup.time == "FIRST",]
mydata.p.long <- mydata.p.long[mydata.p.long$matchup.time == "FIRST",]




###########################################################################
## Hypothesis 1 (and 4 and 5)

## estimate tau (effect of text ethnicity on choice for full sample)
mydata.sub <- na.omit(mydata.w.long)
lm.tau <- lm(choice ~ candA.ethnicity, weights=weight, data=mydata.sub)
tab.tau <- coeftest(lm.tau, vcov=vcovCluster(lm.tau,
                                             cluster=mydata.sub$respondent.ID))


## estimate nu (effect of photo ethnicity on choice for full sample)
mydata.sub <- na.omit(mydata.p.long)
lm.nu <- lm(choice ~ candA.ethnicity, weights=weight, data=mydata.sub)
tab.nu <- coeftest(lm.nu, vcov=vcovCluster(lm.nu,
                                             cluster=mydata.sub$respondent.ID))



delta1.hat <- coef(lm.tau)[2] - coef(lm.nu)[2]
delta1.var <- tab.tau[2,2]^2 + tab.nu[2,2]^2
delta1.se <- sqrt(delta1.var)
delta1.z <- delta1.hat / delta1.se
delta1.pval <- 1 - pchisq(delta1.z^2, df=1)






## put estimates in a nice latex table
library(xtable)

tau.hat <- coef(lm.tau)[2]
tau.se <- tab.tau[2,2]
tau.z <- tau.hat / tau.se
tau.pval <- 1 - pchisq(tau.z^2, df=1)

nu.hat <- coef(lm.nu)[2]
nu.se <- tab.nu[2,2]
nu.z <- nu.hat / nu.se
nu.pval <- 1 - pchisq(nu.z^2, df=1)



estimands <- c("tau", "nu", "delta1")
estimates <- c(tau.hat, nu.hat, delta1.hat)
ses <- c(tau.se, nu.se, delta1.se)
pvals <- c(tau.pval, nu.pval, delta1.pval)



tab.first.out <- data.frame(estimand=estimands, estimate=estimates,
                      se=ses, pval=pvals)

rownames(tab.first.out) <- NULL
print(xtable(tab.first.out, digits=3), include.rownames=FALSE)

cat("\n\n")





###########################################################################
###########################################################################
## now just the 2nd through 6th matchups
## load the long (stacked) data
load("./PhotosLong-ALL.Rda")
load("./WordsLong-ALL.Rda")

## convert choice variable into numeric
mydata.w.long$choice <- as.numeric(mydata.w.long$choice == "CandA")
mydata.p.long$choice <- as.numeric(mydata.p.long$choice == "CandA")

## subset to just 2nd to 6th matchups
mydata.w.long <- mydata.w.long[mydata.w.long$matchup.time != "FIRST",]
mydata.p.long <- mydata.p.long[mydata.p.long$matchup.time != "FIRST",]




###########################################################################
## Hypothesis 1 (and 4 and 5)

## estimate tau (effect of text ethnicity on choice for full sample)
mydata.sub <- na.omit(mydata.w.long)
lm.tau <- lm(choice ~ candA.ethnicity, weights=weight, data=mydata.sub)
tab.tau <- coeftest(lm.tau, vcov=vcovCluster(lm.tau,
                                             cluster=mydata.sub$respondent.ID))


## estimate nu (effect of photo ethnicity on choice for full sample)
mydata.sub <- na.omit(mydata.p.long)
lm.nu <- lm(choice ~ candA.ethnicity, weights=weight, data=mydata.sub)
tab.nu <- coeftest(lm.nu, vcov=vcovCluster(lm.nu,
                                             cluster=mydata.sub$respondent.ID))



delta1.hat <- coef(lm.tau)[2] - coef(lm.nu)[2]
delta1.var <- tab.tau[2,2]^2 + tab.nu[2,2]^2
delta1.se <- sqrt(delta1.var)
delta1.z <- delta1.hat / delta1.se
delta1.pval <- 1 - pchisq(delta1.z^2, df=1)









## put estimates in a nice latex table
library(xtable)

tau.hat <- coef(lm.tau)[2]
tau.se <- tab.tau[2,2]
tau.z <- tau.hat / tau.se
tau.pval <- 1 - pchisq(tau.z^2, df=1)

nu.hat <- coef(lm.nu)[2]
nu.se <- tab.nu[2,2]
nu.z <- nu.hat / nu.se
nu.pval <- 1 - pchisq(nu.z^2, df=1)



estimands <- c("tau", "nu", "delta1")
estimates <- c(tau.hat, nu.hat, delta1.hat)
ses <- c(tau.se, nu.se, delta1.se)
pvals <- c(tau.pval, nu.pval, delta1.pval)



tab.2to6.out <- data.frame(estimand=estimands, estimate=estimates,
                      se=ses, pval=pvals)

rownames(tab.2to6.out) <- NULL
print(xtable(tab.2to6.out, digits=3), include.rownames=FALSE)


